Part 3: PCA Feature Components#

This notebook will perform PCA to further understand the weather narrative files. Code is largely based on ETA methods discussed in class.

# data wrangling -- typical packages
import pandas as pd
import numpy as np
import re


# data viz
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as pyo 
import seaborn as sns
from matplotlib import pyplot as plt
pyo.init_notebook_mode() ## ensures that the plotly graphics convert to HTML
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt


# tokenizing
from glob import glob
import nltk

# distance computing
from numpy.linalg import norm
from scipy.spatial.distance import pdist, squareform

# pca stuff
from scipy.linalg import norm, eigh
from sklearn.decomposition import PCA
# setting OHCO
OHCO = ["EVENT_Label","EVENT_ID", "para_num", "sent_num", "token_num"] # event ID is the chapter number 

# setting the bags
SENTS = OHCO[:4]
PARAS = OHCO[:3]
EVENT_ID = OHCO[:2]
EVENT_TYPE = OHCO[:1]
# reading the data
path = "/Users/prabh/Desktop/Portfolio/mynewbook/data/"

LIB = pd.read_csv(path+"LIB2020.csv")
LIB_Types = pd.read_csv(path+"LIB_TYPES2020.csv", index_col=[0])
TOKEN = pd.read_csv(path+"TOKEN2020.csv").set_index(OHCO)
VOCAB = pd.read_csv(path+"VOCAB2020.csv").set_index("term_str")

Create Two Seperate LIBs to Diagnose Prinicpal Components#

state_types = LIB[["EVENT_ID", "STATE"]].set_index("EVENT_ID")["STATE"].to_dict()
event_types = LIB_Types.set_index('EVENT_Label')['EVENT_TYPE'].to_dict()
event_types
{1: 'Dense Fog',
 2: 'High Wind',
 3: 'Flood',
 4: 'Winter Storm',
 5: 'Heavy Snow',
 6: 'Winter Weather',
 7: 'Thunderstorm Wind',
 8: 'Strong Wind',
 9: 'Blizzard',
 10: 'Marine Thunderstorm Wind',
 11: 'Heavy Rain',
 12: 'Lakeshore Flood',
 13: 'Tornado',
 14: 'Ice Storm',
 15: 'High Surf',
 16: 'Flash Flood',
 17: 'Lightning',
 18: 'Extreme Cold/Wind Chill',
 19: 'Hail',
 20: 'Avalanche',
 21: 'Drought',
 22: 'Frost/Freeze',
 23: 'Debris Flow',
 24: 'Rip Current',
 25: 'Lake-Effect Snow',
 26: 'Marine High Wind',
 27: 'Sleet',
 28: 'Cold/Wind Chill',
 29: 'Coastal Flood',
 30: 'Dust Storm',
 31: 'Waterspout',
 32: 'Astronomical Low Tide',
 33: 'Wildfire',
 34: 'Funnel Cloud',
 35: 'Freezing Fog',
 36: 'Dust Devil',
 37: 'Marine Hail',
 38: 'Heat',
 39: 'Excessive Heat',
 40: 'Marine Strong Wind',
 41: 'Marine Dense Fog',
 42: 'Tropical Depression',
 43: 'Tropical Storm',
 44: 'Marine Tropical Storm',
 45: 'Storm Surge/Tide',
 46: 'Hurricane',
 47: 'Marine Hurricane/Typhoon',
 48: 'Dense Smoke',
 49: 'Marine Tropical Depression',
 50: 'Seiche'}
# # checking LIB
# LIB_Type = LIB_Type[["EVENT_label", "EVENT_TYPE"]]
# LIB_Type
LIB_Types
EVENT_Label EVENT_TYPE
0 1 Dense Fog
1 2 High Wind
2 3 Flood
3 4 Winter Storm
4 5 Heavy Snow
5 6 Winter Weather
6 7 Thunderstorm Wind
7 8 Strong Wind
8 9 Blizzard
9 10 Marine Thunderstorm Wind
10 11 Heavy Rain
11 12 Lakeshore Flood
12 13 Tornado
13 14 Ice Storm
14 15 High Surf
15 16 Flash Flood
16 17 Lightning
17 18 Extreme Cold/Wind Chill
18 19 Hail
19 20 Avalanche
20 21 Drought
21 22 Frost/Freeze
22 23 Debris Flow
23 24 Rip Current
24 25 Lake-Effect Snow
25 26 Marine High Wind
26 27 Sleet
27 28 Cold/Wind Chill
28 29 Coastal Flood
29 30 Dust Storm
30 31 Waterspout
31 32 Astronomical Low Tide
32 33 Wildfire
33 34 Funnel Cloud
34 35 Freezing Fog
35 36 Dust Devil
36 37 Marine Hail
37 38 Heat
38 39 Excessive Heat
39 40 Marine Strong Wind
40 41 Marine Dense Fog
41 42 Tropical Depression
42 43 Tropical Storm
43 44 Marine Tropical Storm
44 45 Storm Surge/Tide
45 46 Hurricane
46 47 Marine Hurricane/Typhoon
47 48 Dense Smoke
48 49 Marine Tropical Depression
49 50 Seiche
# checking TOKEN
TOKEN.head()
pos_tuple pos token_str term_str
EVENT_Label EVENT_ID para_num sent_num token_num
1 863438 0 0 0 ('Visibility', 'NNP') NNP Visibility visibility
1 ('below', 'IN') IN below below
2 ('a', 'DT') DT a a
3 ('quarter', 'NN') NN quarter quarter
4 ('mile', 'NN') NN mile mile
# checking VOCAB
VOCAB.head()
term_id n num stop p_stem pos_max
term_str
NaN 0 355 0 0 NaN :
0 1 17 1 0 0 CD
000 2 1 1 0 000 CD
0000 3 11 1 0 0000 CD
0000cst 4 4 1 0 0000cst CD

Vector Space Models #

# removing NAs
VOCAB = VOCAB[~VOCAB.index.isna()]
VOCAB
term_id n num stop p_stem pos_max
term_str
0 1 17 1 0 0 CD
000 2 1 1 0 000 CD
0000 3 11 1 0 0000 CD
0000cst 4 4 1 0 0000cst CD
0001 5 6 1 0 0001 CD
... ... ... ... ... ... ...
zoologico 28971 1 0 0 zoologico NNP
zortman 28972 11 0 0 zortman NNP
zucksville 28973 1 0 0 zucksvil NNP
zumbrunn 28974 1 0 0 zumbrunn NNP
zuni 28975 1 0 0 zuni NNP

28975 rows × 6 columns

# TFIDF Function
def get_vocab(tokens, vocab, bag, tf_type='n', item_type='term_str', alpha=.4, new_col_suffix=''):
    
    # Create BOW
    BOW = tokens.groupby(bag+[item_type])[item_type].count()\
        .to_frame('n')
    BOW['c'] = 1
    
    # Compute TF
    D = BOW.groupby(bag).n
    if tf_type == 'n':
        BOW['tf'] = BOW.n
    elif tf_type == 'sum':
        BOW['tf'] = D.apply(lambda x: x / x.sum()) # cp = P(w|d)
    elif tf_type == 'l2':
        BOW['tf'] = D.apply(lambda x: x / np.sqrt((x**2).sum()))
    elif tf_type == 'max':
        BOW['tf'] = D.apply(lambda x: alpha + (1-alpha) * (x / x.max()))
    elif tf_type == 'log':
        BOW['tf'] = D.apply(lambda x: np.log2(1 + x))
    elif tf_type == 'sub':
        BOW['tf'] = D.apply(lambda x: 1 + np.log2(x))
    elif tf_type == 'bool':
        BOW['tf'] = BOW.c
    elif tf_type == 'bool2':
        BOW['tf'] = D.apply(lambda x: 1 / len(x))
    
    # Normalize TF
    
    # Compute IDF
    vocab['df'] = BOW.groupby('term_str').n.count()
    N_docs = len(D.groups)
    vocab['idf'] = np.log2(N_docs/vocab.df)
    
    # Compute TFIDF
    BOW['tfidf'] = BOW.tf * vocab.idf
    
    # Compute aggregate TFIDF
    col = 'tfidf_sum' + new_col_suffix
    vocab[col] = BOW.groupby(item_type)['tfidf'].sum()
    
#     vocab[col] = (vocab[col] - vocab[col].mean()) / vocab[col].std()
#     vocab[col] = vocab[col] - vocab[col].min()
    
    return vocab, BOW
def get_tfidf(TOKEN, bag, count_method='n', tf_method='sum', item_type='term_str'):
    
    # Create bag of items (terms or stems)
    BOW = TOKEN.groupby(bag+[item_type])[item_type].count()\
        .to_frame().rename(columns={item_type:'n'})

    # Add binary count column
    BOW['c'] = BOW.n.astype('bool').astype('int')
    
    # Create documnet-term count matrix
    DTCM = BOW[count_method].unstack().fillna(0).astype('int')

    # Compute TF
    if tf_method == 'sum':
        TF = DTCM.T / DTCM.T.sum()
    elif tf_method == 'max':
        TF = .4 + .6 * (DTCM.T / DTCM.T.max()) # See Manning, et al. for choice of α
    elif tf_method == 'log':
        TF = np.log10(DTCM.T + 1)
    elif tf_method == 'raw':
        TF = DTCM.T
    elif tf_method == 'binary':
        TF = DTCM.T.astype('bool').astype('int')
    
    # Compute IDF
    N = DTCM.shape[0]
    DF = DTCM[DTCM > 0].count()
    IDF = np.log10(N / DF)
    
    TFIDF = TF.T * IDF
    
    return TFIDF
def get_pca(X, n_comps=10):
    
    global VSHORT

    COV = X.cov()

    eig_vals, eig_vecs = eigh(COV)
    EIG_VEC = pd.DataFrame(eig_vecs, index=VSHORT.index, columns=VSHORT.index)
    EIG_VAL = pd.DataFrame(eig_vals, index=VSHORT.index, columns=['eig_val'])
    EIG_VAL.index.name = 'term_id'
    EIG_PAIRS = EIG_VAL.join(EIG_VEC.T)
    EIG_PAIRS['exp_var'] = np.round((EIG_PAIRS.eig_val / EIG_PAIRS.eig_val.sum()) * 100, 2)

    COMPS = EIG_PAIRS.sort_values('exp_var', ascending=False).head(n_comps).reset_index(drop=True)
    COMPS.index.name = 'comp_id'
    COMPS.index = ["PC{}".format(i) for i in COMPS.index.tolist()]

    LOADINGS = COMPS[VSHORT.index].T
    LOADINGS.index.name = 'term_str'

    DCM = TFIDF_SHORT.dot(COMPS[VSHORT.index].T)
#     DCM = DCM.join(LIB[['author','genre']], on='book')
#     DCM['title'] = DCM.apply(lambda x: x.name[0] + ', ch '+ str(x.name[1]), 1)
    
    return EIG_PAIRS, COMPS, LOADINGS, DCM
    
def vis_pcs(M, a, b, label="EVENT_TYPE", prefix='PC', symbol=None):
    fig = px.scatter(M, prefix + str(a), prefix + str(b), 
                        color=label,
                        hover_name='EVENT_TYPE',
#                         hover_data=['genre', 'title'],
#                         marginal_x='box',
                        symbol=symbol,
                        height=1000)
    fig.show()
max_terms = 4000
tf_method = 'max' # sum, max, raw, bool, bool_raw
global_term_sig = 'tfidf_sum' # tfidf_sum, dfidf
n_comps = 10

EVENT ID Bag#

# tf-idf with event id bag
VOCAB2, BOW = get_vocab(TOKEN, VOCAB, bag=EVENT_ID, tf_type='max', new_col_suffix='_event_max', alpha=0)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-35c350db7783> in <module>
      1 # tf-idf with event id bag
----> 2 VOCAB2, BOW = get_vocab(TOKEN, VOCAB, bag=EVENT_ID, tf_type='max', new_col_suffix='_event_max', alpha=0)

<ipython-input-11-7a7958747066> in get_vocab(tokens, vocab, bag, tf_type, item_type, alpha, new_col_suffix)
     16         BOW['tf'] = D.apply(lambda x: x / np.sqrt((x**2).sum()))
     17     elif tf_type == 'max':
---> 18         BOW['tf'] = D.apply(lambda x: alpha + (1-alpha) * (x / x.max()))
     19     elif tf_type == 'log':
     20         BOW['tf'] = D.apply(lambda x: np.log2(1 + x))

~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in apply(self, func, *args, **kwargs)
    219     )
    220     def apply(self, func, *args, **kwargs):
--> 221         return super().apply(func, *args, **kwargs)
    222 
    223     @doc(_agg_template, examples=_agg_examples_doc, klass="Series")

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
    892         with option_context("mode.chained_assignment", None):
    893             try:
--> 894                 result = self._python_apply_general(f, self._selected_obj)
    895             except TypeError:
    896                 # gh-20949

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
    926             data after applying f
    927         """
--> 928         keys, values, mutated = self.grouper.apply(f, data, self.axis)
    929 
    930         return self._wrap_applied_output(

~\anaconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    236             # group might be modified
    237             group_axes = group.axes
--> 238             res = f(group)
    239             if not _is_indexed_like(res, group_axes, axis):
    240                 mutated = True

<ipython-input-11-7a7958747066> in <lambda>(x)
     16         BOW['tf'] = D.apply(lambda x: x / np.sqrt((x**2).sum()))
     17     elif tf_type == 'max':
---> 18         BOW['tf'] = D.apply(lambda x: alpha + (1-alpha) * (x / x.max()))
     19     elif tf_type == 'log':
     20         BOW['tf'] = D.apply(lambda x: np.log2(1 + x))

~\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
     63         other = item_from_zerodim(other)
     64 
---> 65         return method(self, other)
     66 
     67     return new_method

~\anaconda3\lib\site-packages\pandas\core\arraylike.py in __truediv__(self, other)
    111     @unpack_zerodim_and_defer("__truediv__")
    112     def __truediv__(self, other):
--> 113         return self._arith_method(other, operator.truediv)
    114 
    115     @unpack_zerodim_and_defer("__rtruediv__")

~\anaconda3\lib\site-packages\pandas\core\series.py in _arith_method(self, other, op)
   4996         lvalues = extract_array(self, extract_numpy=True)
   4997         rvalues = extract_array(other, extract_numpy=True)
-> 4998         result = ops.arithmetic_op(lvalues, rvalues, op)
   4999 
   5000         return self._construct_result(result, name=res_name)

~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in arithmetic_op(left, right, op)
    187     else:
    188         with np.errstate(all="ignore"):
--> 189             res_values = _na_arithmetic_op(lvalues, rvalues, op)
    190 
    191     return res_values

~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in _na_arithmetic_op(left, right, op, is_cmp)
    140 
    141     try:
--> 142         result = expressions.evaluate(op, left, right)
    143     except TypeError:
    144         if is_cmp:

~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, a, b, use_numexpr)
    233         if use_numexpr:
    234             # error: "None" not callable
--> 235             return _evaluate(op, op_str, a, b)  # type: ignore[misc]
    236     return _evaluate_standard(op, op_str, a, b)
    237 

~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
    118 
    119     if result is None:
--> 120         result = _evaluate_standard(op, op_str, a, b)
    121 
    122     return result

~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b)
     67         _store_test_result(False)
     68     with np.errstate(all="ignore"):
---> 69         return op(a, b)
     70 
     71 

KeyboardInterrupt: 
VOCAB2
term_id n num stop p_stem pos_max df idf tfidf_sum_event_max
term_str
0 1 17 1 0 0 CD 17 11.412819 68.476913
000 2 1 1 0 000 CD 1 15.500282 1.409117
0000 3 11 1 0 0000 CD 11 12.040850 88.012880
0000cst 4 4 1 0 0000cst CD 4 13.500282 15.187817
0001 5 6 1 0 0001 CD 6 12.915319 30.135745
... ... ... ... ... ... ... ... ... ...
zoologico 28971 1 0 0 zoologico NNP 1 15.500282 15.500282
zortman 28972 11 0 0 zortman NNP 11 12.040850 81.198553
zucksville 28973 1 0 0 zucksvil NNP 1 15.500282 15.500282
zumbrunn 28974 1 0 0 zumbrunn NNP 1 15.500282 7.750141
zuni 28975 1 0 0 zuni NNP 1 15.500282 3.100056

28975 rows × 9 columns

Saving Files#

VOCAB.to_csv("2020vocab_tfidf.csv")
BOW.to_csv("2020_BOW.csv")
TFIDF = BOW.tfidf.unstack(fill_value=0)
TFIDF
term_str 0 000 0000 0000cst 0001 0002 0006 001 0010 001010 ... zoie zollinger zone zones zoo zoologico zortman zucksville zumbrunn zuni
EVENT_Label EVENT_ID
1 863438 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
863439 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
863440 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
863441 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
863450 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
49 924958 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
50 919803 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
920755 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
922117 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
922118 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

46350 rows × 28975 columns

VSHORT = VOCAB.sort_values("tfidf_sum_event_max", ascending=False).head(max_terms)
TFIDF_SHORT = TFIDF[VSHORT.index]
EIG_PAIRS, COMPS, LOADINGS, DCM = get_pca(TFIDF_SHORT)
# visualzing the components
COMPS.exp_var.sort_values().plot.barh();
_images/5922d5ff1492d88b75be3fd8e72e45e00a578452b820d8fd736a3ab088699519.png
DCM = DCM.reset_index()
DCM["EVENT_TYPE"] = DCM.EVENT_Label.map(event_types)
# checking the DCM table
DCM
EVENT_Label EVENT_ID PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 EVENT_TYPE
0 1 863438 -0.097507 -1.612673 0.979496 -0.396077 -0.064989 -0.079892 -0.049508 -0.097036 -0.079670 -0.072918 Dense Fog
1 1 863439 0.170473 -0.614277 -0.031911 0.098178 -0.445449 -0.199647 -0.001389 -0.263082 -0.241475 -0.012642 Dense Fog
2 1 863440 -0.096439 -1.609360 0.974462 -0.392756 -0.065251 -0.082850 -0.047805 -0.099461 -0.081062 -0.070115 Dense Fog
3 1 863441 0.270920 -0.975679 0.157993 -0.237277 -0.697348 -0.078174 -0.046294 -0.218459 -0.483641 0.043804 Dense Fog
4 1 863450 0.128880 -0.928709 0.312683 -0.043604 -0.053874 -0.232403 0.109937 -0.296813 -0.048831 0.076559 Dense Fog
... ... ... ... ... ... ... ... ... ... ... ... ... ...
46345 49 924958 -2.531190 1.487574 0.061748 0.346133 -0.328712 -0.343993 0.136372 -0.416950 0.532895 -0.282943 Marine Tropical Depression
46346 50 919803 -0.191457 -0.120002 -0.683185 0.621446 -0.548783 -0.188204 0.387929 -0.246870 0.251690 0.595278 Seiche
46347 50 920755 -0.867832 0.266494 -0.284849 0.279768 -0.355512 0.034088 0.483872 -0.257506 -0.266096 0.621381 Seiche
46348 50 922117 -0.206966 -0.013357 -0.361326 0.268605 -0.207056 -0.051283 0.318997 -0.279755 0.041178 0.357827 Seiche
46349 50 922118 -0.191457 -0.120002 -0.683185 0.621446 -0.548783 -0.188204 0.387929 -0.246870 0.251690 0.595278 Seiche

46350 rows × 13 columns

Saving the DCM Table#

DCM = pd.read_csv("2020DCM.csv")
### Visualzing the Prinicapl Componets 
vis_pcs(DCM, 0, 1)
DCM["STATE"] = DCM.EVENT_ID.map(state_types)
DCM
EVENT_Label EVENT_ID PC0 PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 EVENT_TYPE STATE
0 1 863438 -0.097507 -1.612673 0.979496 -0.396077 -0.064989 -0.079892 -0.049508 -0.097036 -0.079670 -0.072918 Dense Fog CALIFORNIA
1 1 863439 0.170473 -0.614277 -0.031911 0.098178 -0.445449 -0.199647 -0.001389 -0.263082 -0.241475 -0.012642 Dense Fog CALIFORNIA
2 1 863440 -0.096439 -1.609360 0.974462 -0.392756 -0.065251 -0.082850 -0.047805 -0.099461 -0.081062 -0.070115 Dense Fog CALIFORNIA
3 1 863441 0.270920 -0.975679 0.157993 -0.237277 -0.697348 -0.078174 -0.046294 -0.218459 -0.483641 0.043804 Dense Fog CALIFORNIA
4 1 863450 0.128880 -0.928709 0.312683 -0.043604 -0.053874 -0.232403 0.109937 -0.296813 -0.048831 0.076559 Dense Fog CALIFORNIA
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
46345 49 924958 -2.531190 1.487574 0.061748 0.346133 -0.328712 -0.343993 0.136372 -0.416950 0.532895 -0.282943 Marine Tropical Depression ATLANTIC SOUTH
46346 50 919803 -0.191457 -0.120002 -0.683185 0.621446 -0.548783 -0.188204 0.387929 -0.246870 0.251690 0.595278 Seiche MICHIGAN
46347 50 920755 -0.867832 0.266494 -0.284849 0.279768 -0.355512 0.034088 0.483872 -0.257506 -0.266096 0.621381 Seiche NEW YORK
46348 50 922117 -0.206966 -0.013357 -0.361326 0.268605 -0.207056 -0.051283 0.318997 -0.279755 0.041178 0.357827 Seiche INDIANA
46349 50 922118 -0.191457 -0.120002 -0.683185 0.621446 -0.548783 -0.188204 0.387929 -0.246870 0.251690 0.595278 Seiche MICHIGAN

46350 rows × 14 columns

# vis_pcs(DCM, 0, 1, label="STATE")
# vis_pcs(DCM, 1, 2)
# vis_pcs(DCM, 1, 2, label="STATE")
# vis_pcs(DCM, 2, 3)
# vis_pcs(DCM, 3, 4, label="STATE")
# vis_pcs(DCM, 4, 5, label="STATE")
# vis_pcs(DCM, 5, 6, label="STATE")
# vis_pcs(DCM, 6, 7, label="STATE")
# vis_pcs(DCM, 7, 8, label="STATE")
# px.scatter_3d(DCM, 'PC0', 'PC1', 'PC2', color='STATE', height=1000, hover_name='EVENT_TYPE',
#                         hover_data=['STATE'])
# DCM.to_csv("2020DCM.csv")